# HTS chapters map with numeric ranges, Roman numerals, and full names
# Detailed info can be found: https://hts.usitc.gov/
# The reason we need to do this is that some of the chapter-based price indices fall under the same category.
hts_chapters_map <- tribble(
  ~min, ~max, ~hts_chapters, ~hts_chapters_name,
  1,   5,   "I",     "Animal products",
  6,   14,  "II",    "Vegetable products",
  15,  15,  "III",   "Animal or vegetable fats and oils",
  16,   24,  "IV",   "Prepared foodstuffs, beverages, and tobacco",
  25,   27,  "V",    "Mineral products",
  28,   38,  "VI",    "Products of the chemical or allied industries",
  39,   40,  "VII",   "Plastics and articles thereof; rubber and articles thereof",
  41,   43,  "VIII",  "Hides and skins",
  44,   46,  "IX",    "Wood, wood charcoal, cork, straw, basketware and wickerwork",
  47,   49,  "X",     "Woodpulp, recovered paper, and paper products",
  50,   63,  "XI",    "Textiles and clothing",
  64,   67,  "XII",   "Footwear",
  68,   70,  "XIII",   "Stone and glass",
  71,   71,  "XIV",   "Pearls, stones, precious metals, imitation jewelry, and coins", 
  72,   83,  "XV",    "Base metals and articles of base metals",
  84,   85,  "XVI",   "Machinery",
  86,   89,  "XVII",  "Transport equipment",
  90,   92,  "XVIII", "Optical and instruments",
  93,   93,  "XIX",   "Arms and Ammunition, parts and accessories thereof",
  94,   96,  "XX",    "Miscellaneous manufactured articles",
  97,   97,  "XXI",   "Work of art, collectors' pieces and antiques",
  98,   99,  "XXII",  "Special Classification Provisions; Temporary Legislation; Temporary Modifications Proclaimed pursuant to Trade Agreements Legislation; Additional Import Restrictions Proclaimed Pursuant to Section 22 of the Agricultural Adjustment Act, As Amended"
)

# Function to pull HTS chapters into the import/export trade data
find_hts_chapters <- function(hs02){
  check <- hts_chapters_map %>% filter(hs02 >= min, hs02 <= max) %>% pull(hts_chapters)
  if (length(check) == 0) NA else check
}

IM_data_1 <- IM_data %>%
  mutate(hts_chapters = sapply(hs02, find_hts_chapters))
EX_data_1 <- EX_data %>%
  mutate(hts_chapters = sapply(hs02, find_hts_chapters))



# Now, we have the trade data ready and can merge the index data into the trade data
# hs_2024 and hs_2025 are the hs02 index; 
# while hts_2024 and hts_2025 will be the price index of the corresponding hts chapters
# For each HS-02 level data point, we would like to have at least one of the following pieces of information: 
# the price index for this HS-02 category, the price index for the HTS chapter it belongs to, or the overall export/import price index as a proxy.


hs_match <- function(trade_data, hs02_index_processed, hts_index_processed, total_index_processed) {
  
  # ensure index data have correct join columns
  hs02_index_processed <- hs02_index_processed %>%
    mutate(hs02 = as.character(hs02))
  
  hts_index_processed <- hts_index_processed %>%
    mutate(hts_chapters = as.character(hts_chapters))
  
  # join by month + hs02/hts chapters
  trade_data <- trade_data%>%
    mutate(
      hs02 = as.character(hs02),
      hts_chapters = as.character(hts_chapters)
    ) %>%
    left_join(
      hs02_index_processed %>% select(month, hs02, hs02_2024, hs02_2025),
      by = c("month", "hs02")) %>%
    left_join(
      hts_index_processed %>% select(month, hts_chapters, hts_chapters_2024,hts_chapters_2025), 
      by = c("month", "hts_chapters")) %>%
    left_join(
      total_index_processed %>% select(month, index_total_2024, index_total_2025),
      by = c("month")
    ) %>%
    mutate(
      #calc_2024 and cal_2025 are indices will be used for price adjustment for each row
      calc_2024 = case_when(
        #In general, we use hs02 price index if they are available for both 2024 and 2025
        (!is.na(hs02_2024)& !is.na(hs02_2025)) ~ hs02_2024,
        
        #If not, then we use the price index for the HTS chapter that this hs02 belongs to as proxy
        ((!is.na(hts_chapters_2024)& !is.na(hts_chapters_2025)) &
            (is.na(hs02_2024) | is.na(hs02_2025))) ~ hts_chapters_2024,
        
        #If both hs02 and hts chapters are not available, then use the total import/export price index as proxy
        TRUE ~ index_total_2024
      ),
      calc_2025 = case_when(
        (!is.na(hs02_2024) & !is.na(hs02_2025)) ~ hs02_2025,
        ((!is.na(hts_chapters_2024) & !is.na(hts_chapters_2025)) &
            (is.na(hs02_2024) | is.na(hs02_2025))) ~ hts_chapters_2025,
        TRUE ~ index_total_2025
      ),
      index_source = case_when(
        (!is.na(hs02_2024) & !is.na(hs02_2025)) ~ "hs02",
        ((!is.na(hts_chapters_2024) & !is.na(hts_chapters_2025)) &
            (is.na(hs02_2024) | is.na(hs02_2025))) ~ "hts_chapters",
        TRUE ~ NA
      )
    )
  return(trade_data)
  
}


#Total imports price index: EIUIR
#Total exports price index: EIUIQ

IM_data_processed <- hs_match(IM_data_1, hs02_im_index_processed, hts_im_index_processed, total_im_index_processed)
EX_data_processed <- hs_match(EX_data_1, hs02_ex_index_processed, hts_ex_index_processed, total_ex_index_processed)


## Generate cumulative tariff revenue as a percent of cumulative imports
tariff_rev <- cald %>%
  left_join(cifval, by = c("hs02", "month")) %>%
  mutate(
    across(c(cald_2024, cald_2025, cifval_2024, cifval_2025), as.numeric),
    month = factor(month, levels = month.name)  
  ) %>%
  arrange(hs02, month) %>%  #
  group_by(hs02) %>%
  summarise(
    cumsum_cald_24 = sum(cald_2024, na.rm = TRUE),
    cumsum_cald_25 = sum(cald_2025, na.rm = TRUE),
    cumsum_cifval_24 = sum(cifval_2024, na.rm = TRUE),
    cumsum_cifval_25 = sum(cifval_2025, na.rm = TRUE),
    .groups = "drop"
  )





